﻿# 遍历pdf文件，对有pdf文件但是数据库中没有记录的文件删除
import os
import pymysql
# 这是mysql数据库的连接属性
MYSQL_CONNECT = {
    'MYSQL_HOST': '114.115.219.219',
    'MYSQL_PORT': 3306,
    'MYSQL_DB': 'globallegaldatabase',
    'MYSQL_USER': 'root',
    'MYSQL_PASSWD': 'xinanzhengfadaqxueapp03!',
    'MYSQL_CHARSET': 'utf8',
    'MYSQL_TABLE': 'lawcasetext'  # 这是数据库存储数据的表
}


# 遍历文件夹函数：file_dir为文件夹路径，file_write为写入的文档
def file_name(file_dir, mysql_country, urlcursor, i):
    j = 0
    sys = []
    sql = "SELECT SYS_FLD_DIGITFILENAME FROM %s WHERE SortA = '%s'" % (MYSQL_CONNECT['MYSQL_TABLE'], mysql_country)
    print(sql)
    try:
        urlcursor.execute(sql)
        rs = urlcursor.fetchall()
        for r in rs:
            for i in range(len(r)):
                file_sql = r[i]
                sys.append(file_sql)
    except Exception as e:
        print('mysql查找数据异常：' + str(e))
    for root, dirs, files in os.walk(file_dir, topdown=False):
        print(root)  # 当前目录路径
        for pdffile in files:
            j = j + 1
            print("\r已处理: %d" % j, end="")
            if len(pdffile):
                if pdffile not in sys:
                    with open('D:/GlobalLawFiles/nocasesql.txt', 'a', encoding='utf-8') as f:
                        i = i + 1
                        f.write(str(i) + '    ' + root + '\\' + pdffile + '\n')
                    pdfpath = root + '\\' + pdffile
                    # if os.path.exists(pdfpath):
                    #     os.remove(pdfpath)


# 连接数据库
host = MYSQL_CONNECT['MYSQL_HOST']
port = MYSQL_CONNECT['MYSQL_PORT']
db = MYSQL_CONNECT['MYSQL_DB']
user = MYSQL_CONNECT['MYSQL_USER']
password = MYSQL_CONNECT['MYSQL_PASSWD']
urlconn = pymysql.connect(host=host, port=port, db=db, user=user, passwd=password, charset='utf8')
# 创建游标测试连接数据库是否成功
urlcursor1 = urlconn.cursor()
# Country = ['LAWCOUNTRYYDNXY']
Country = ['LAWCOUNTRYXJP', 'LAWCOUNTRYFLB', 'LAWCOUNTRYWL', 'LAWCOUNTRYYDNXY', 'LAWCOUNTRYMLXY',
           'LAWCOUNTRYTG', 'LAWCOUNTRYYN', 'LAWCOUNTRYMD', 'LAWCOUNTRYLW', 'LAWCOUNTRYJPZ']
Countrypath = {'LAWCOUNTRYXJP': 'Singapore', 'LAWCOUNTRYFLB': 'Philippines', 'LAWCOUNTRYWL': 'Brunei',
               'LAWCOUNTRYYDNXY': 'Indonesia', 'LAWCOUNTRYTG': 'Thailand', 'LAWCOUNTRYMLXY': 'Malaysia',
               'LAWCOUNTRYYN': 'Vietnam', 'LAWCOUNTRYMD': 'Myanmar', 'LAWCOUNTRYLW': 'Laos',
               'LAWCOUNTRYJPZ': 'Cambodia'}
for sqlcountry in Country:
    countrypath = Countrypath[sqlcountry]
    print(countrypath)
    path = "D:\\GlobalLawFiles\\Case\\%s" % countrypath
    # path = "D:\\GlobalLawFiles\\Case\\Indonesia\\putusan\\PidanaUmum"
    n = 0
    file_name(path, sqlcountry, urlcursor1, n)
    with open('D:/GlobalLawFiles/nocasesql.txt', 'a', encoding='utf-8') as f:
        f.write('清洗结束:' + path + '\n')

urlcursor1.close()
urlconn.close()
